In [1]:
import cmasher as cmr
import colorcet as cc
import numpy as np
import matplotlib as mpl
import matplotlib.patches as mpatches
import matplotlib.pyplot as plt
%matplotlib inline
from matplotlib.ticker import MaxNLocator
import pandas as pd
import re
from scipy.optimize import curve_fit
import seaborn as sns
import string
In [2]:
corruption_data = pd.read_csv(r'CPI_data_2018.csv')
religiosity_data = pd.read_csv(r'Religiosity_Pew2018.csv')
religiosity_data.dropna(axis='index', how='any', inplace=True)

# Merge the two dataframes on the "country names" column
corruption_religiosity = pd.merge(corruption_data, religiosity_data, on="Country Name")
#pd.options.display.max_rows = None
corruption_religiosity.sort_values(['Corruption Perception Index', 'Think religion is very important'], axis='index', ascending=True, inplace=False)
Out[2]:
Country Name Corruption Perception Index Religious Affiliation Attend weekly Pray daily Think religion is very important
41 Guinea Bissau 16 100 81.0 83.0 91
0 Afghanistan 16 100 61.0 96.0 92
97 Venezuela 18 93 26.0 47.0 67
46 Iraq 18 100 42.0 87.0 82
18 Chad 19 97 77.0 83.0 86
... ... ... ... ... ... ...
70 Norway 84 57 7.0 18.0 19
87 Switzerland 85 79 11.0 8.0 9
34 Finland 85 78 4.0 18.0 10
86 Sweden 85 58 6.0 11.0 10
26 Denmark 88 70 3.0 10.0 9

99 rows × 6 columns

Retrieve the Fit and Correlation Coefficient of Two Variables¶

In [3]:
def regression_line(x, a, b):
    # Curve fitting function
    return a*x + b

# Curve fitting
ykey_string = r'Think religion is very important'
x = corruption_religiosity['Corruption Perception Index'].to_numpy()
y = corruption_religiosity[ykey_string].to_numpy()
params = curve_fit(regression_line, x, y)
inv_params = curve_fit(regression_line, y, x)
a = params[0]
inv_a = inv_params[0]
r_squared = a[0]*inv_a[0]
error_density = np.sqrt(np.diag(params[1]))[0]
x_fit = np.linspace(0, x.max(), 100)
y_fit = a[0]*x_fit + a[1]

Plotting the Scatter Plot of the Relationship¶

In [4]:
sns.set(style="ticks", rc={'axes.facecolor': 'white','axes.edgecolor': 'black',\
                           'axes.grid': True,'grid.color': '#b0b0b0','grid.linestyle': '-'})
fig, ax = plt.subplots(figsize=(8,10), dpi=100)
ax.set_xlim(0,100)
ax.set_ylim(0,100)

# Plotting
scatter_religion = ax.scatter('Corruption Perception Index', ykey_string, s=50, c='Pray daily', marker='o', cmap=cc.cm.CET_R2, vmin=0, vmax=100,
                              alpha=1.0, linewidths=1.0, edgecolors='xkcd:dark grey', plotnonfinite=False, data=corruption_religiosity, zorder=4)
regplot_line_kws = {'linewidth':1.5, 'color':'xkcd:pale red', 'linestyle':'--', 'zorder':3}
sns_regplot = sns.regplot(data=corruption_religiosity, x="Corruption Perception Index", y=ykey_string,
            ax=ax, scatter=False, truncate=False, line_kws=regplot_line_kws)
# Access the confidence interval plot
ci_plot = sns_regplot.get_children()[2]
# Set the z-order of the confidence interval plot
ci_plot.set_zorder(3)  # Set the desired z-order (higher values are drawn on top)

ax.grid(True, alpha=0.5, zorder=1)
ax.set_aspect('equal')
ax.set_xlabel('Indeks Persepsi Korupsi (0:korup, 100:bersih)', fontsize=14, fontfamily='Calibri', fontweight='bold', labelpad=8.0)
ax.set_ylabel('% orang yang berpikir "agama itu penting"', fontsize=14, fontfamily='Calibri', fontweight='bold')

# Colorbar
pray_colorbar = fig.colorbar(scatter_religion, location='bottom', orientation='horizontal', shrink=0.55, pad=0.13)
pray_colorbar.set_label('% orang yang beribadah setiap hari')
pray_colorbar.ax.xaxis.set_major_locator(MaxNLocator(integer=True))

# Title
if np.abs(np.round(a[0],0))==1:
    if np.sign(np.round(a[0],0))==1:
        title_txt = 'Level Religiusitas vs Indeks Persepsi Korupsi' + '\n' + r'$\mathrm{y = x +'+'{:.0f}'.format(a[1])\
        +  r';\;\;\;R^2 =' + '{:.3f}'.format(r_squared) +r'}$'
    if np.sign(np.round(a[0],0))==-1:
        title_txt = 'Level Religiusitas vs Indeks Persepsi Korupsi' + '\n' + r'$\mathrm{y = -x +'+'{:.0f}'.format(a[1])\
        +  r';\;\;\;R^2 =' + '{:.3f}'.format(r_squared) +r'}$'
else:
    title_txt = 'Level Religiusitas vs Indeks Persepsi Korupsi' + '\n' + r'$\mathrm{y =' +\
    '{:.0f}'.format(a[0])+r'x+'+'{:.0f}'.format(a[1])\
    +  r';\;\;\;R^2 =' + '{:.3f}'.format(r_squared) +r'}$'
ax.set_title(title_txt, fontfamily='Proxima Nova Alt', fontsize = 16, fontweight='bold')

# Annotations to the right
countries_list_right = np.array(['Indonesia', 'Botswana', 'Jordan', 'Senegal', 'Rwanda', 'Turkey',
                                 'Serbia', 'Chile', 'Georgia', 'Costa Rica', 'Canada', 'Norway', 'Denmark',
                                 'Bangladesh', 'Paraguay', 'Greece', 'Lithuania', 'Uruguay', 'Portugal',
                                 'Latvia', 'Croatia'])
select_countries_right = corruption_religiosity[corruption_religiosity['Country Name'].isin(countries_list_right)].copy()
loc_text = select_countries_right[['Corruption Perception Index', ykey_string]].to_numpy()
countries_list_right = select_countries_right['Country Name'].to_numpy()

idx = 0
for names in countries_list_right:
    ax.annotate(names, xy=(loc_text[idx,0], loc_text[idx,1]), xycoords='data', xytext=(5, -2), textcoords='offset points',
                color='xkcd:black', fontstyle='italic', fontfamily='Segoe UI', fontsize=10, backgroundcolor='white', zorder=2)
    idx += 1

ax.plot('Corruption Perception Index',ykey_string,color='xkcd:black',marker='.',linewidth=0,markersize=5,data=select_countries_right, zorder=5)

# Annotations to the left
countries_list_left = np.array(['Ethiopia', 'Venezuela', 'Panama', 'South Korea', 'Azerbaijan', 'China', 'Russia',
                                'Uzbekistan', 'Iraq', 'Afghanistan', 'Pakistan', 'Mexico', 'Lebanon', 'Estonia',
                                'Czech Republic', 'Kazakhstan', 'Poland', 'Ireland', 'Tajikistan', 'Israel'])
select_countries_left = corruption_religiosity[corruption_religiosity['Country Name'].isin(countries_list_left)].copy()
loc_text = select_countries_left[['Corruption Perception Index', ykey_string]].to_numpy()
countries_list_left = select_countries_left['Country Name'].to_numpy()

idx = 0
for names in countries_list_left:
    ax.annotate(names, xy=(loc_text[idx,0], loc_text[idx,1]), xycoords='data', xytext=(-5, -2), textcoords='offset points',
                color='xkcd:black', fontstyle='italic', fontfamily='Segoe UI', fontsize=10, backgroundcolor='white', zorder=2, horizontalalignment='right')
    idx += 1

ax.plot('Corruption Perception Index',ykey_string,color='xkcd:black',marker='.',linewidth=0,markersize=5,data=select_countries_left, zorder=5)
Out[4]:
[<matplotlib.lines.Line2D at 0x2a535631c00>]
In [5]:
import plotly.express as px
In [6]:
# Create scatter plot using Plotly
fig = px.scatter(corruption_religiosity, x='Corruption Perception Index', y='Think religion is very important',
                 text='Country Name', hover_data=corruption_religiosity.columns,  width=600, height=800)

# Update layout to display text when clicking on points
fig.update_traces(textposition='top center', mode='markers+text')

# Show plot
fig.show()